home *** CD-ROM | disk | FTP | other *** search
/ Amiga Plus 1997 #3 / Amiga Plus CD - 1997 - No. 03.iso / pd / programmierung / vbcc / loop.c < prev    next >
C/C++ Source or Header  |  1997-01-29  |  59KB  |  1,520 lines

  1. /*  $VER: vbcc (loop.c) V0.4     */
  2. /*  schleifenorientierte Optimierungen  */
  3.  
  4. #include "opt.h"
  5.  
  6. static char FILE_[]=__FILE__;
  7.  
  8. #define MOVE_IC 1
  9. #define MOVE_COMP 2
  10.  
  11. /*  Liste, in die ICs eingetragen werden, die aus Schleifen */
  12. /*  gezogen werden sollen.                                  */
  13. struct movlist{
  14.     struct movlist *next;
  15.     struct IC *IC;
  16.     struct flowgraph *target_fg;
  17.     int flags;
  18. };
  19.  
  20. struct movlist *first_mov,*last_mov;
  21.  
  22. int report_weird_code,report_suspicious_loops;
  23.  
  24. /*  Bitvektoren fuer schleifeninvariante ICs    */
  25. unsigned char *invariant,*inloop,*moved,*moved_completely;
  26. unsigned char *fg_tmp;
  27. unsigned char *not_movable;
  28. size_t bsize;
  29.  
  30.  
  31. /*  Liste, in die ICs fuer strength-reduction eingetragen   */
  32. /*  werden.                                                 */
  33. struct srlist{
  34.     struct srlist *next;
  35.     struct IC *ind_var;
  36.     struct IC *IC;
  37.     struct flowgraph *target_fg;
  38.     /*  Hilfsvariable, falls eine aequivalente Operation schon reduziert    */
  39.     /*  wurde.                                                              */
  40.     struct Var *hv;
  41. };
  42.  
  43. struct srlist *first_sr,*last_sr;
  44.  
  45. /*  Liste, in die Daten fuer loop-unrolling eingetragen werden. */
  46. struct urlist{
  47.     int flags;
  48.     long total,unroll;
  49.     struct IC *cmp,*branch,*ind;
  50.     struct flowgraph *start,*head;
  51.     struct urlist *next;
  52. } *first_ur;
  53.  
  54. #define UNROLL_COMPLETELY 1
  55. #define UNROLL_MODULO 2
  56. #define UNROLL_INVARIANT 3
  57.  
  58. /*  Hier werden Induktionsvariablen vermerkt    */
  59. struct IC **ind_vars;
  60.  
  61. static struct flowgraph *first_fg;
  62.  
  63. int loops(struct flowgraph *fg,int footers)
  64. /*  kennzeichnet Schleifen im Flussgraph; wenn footers!=0 werden darf eine  */
  65. /*  Schleife nur einen gemeinsamen Austrittspunkt haben                     */
  66. {
  67.     int i,start,end,c=0;struct flowlist *lp;struct flowgraph *g,*loopend;
  68.     if(DEBUG&1024) printf("searching loops\n");
  69.     g=fg;
  70.     while(g){
  71.         start=g->index;
  72.         end=-1;
  73.         for(lp=g->in;lp;lp=lp->next){
  74.             if(!lp->graph) continue;
  75.             if(lp->graph->branchout==g||!lp->graph->end||lp->graph->end->code!=BRA){
  76.                 i=lp->graph->index;
  77.                 if(i>=start&&i>end){ end=i;loopend=lp->graph; }
  78.             }
  79.         }
  80.         if(end>=0){
  81.         /*  Schleife oder etwas aehnliches  */
  82.             struct flowgraph *p=g;
  83.             if(DEBUG&1024) printf("found possible loop from blocks %d to %d\n",start,end);
  84.             if(1/*goto_used*/){
  85.                 if(DEBUG&1024) printf("have to check...\n");
  86.                 do{
  87.                     if(!p||p->index>end) break;
  88.  
  89.                     /*  testen, ob aus der Schleife gesprungen wird */
  90.                     if(p->branchout&&footers){
  91.                         i=p->branchout->index;
  92.                         if(i<start){
  93.                             end=-1;
  94.                             break;
  95.                         }
  96.                         if(i>end&&(DEBUG&1024)){
  97.                             puts("jump out of loop");
  98.                             if(p->branchout!=loopend->normalout){
  99.                                 puts("no break");
  100.                                 if(p->branchout->start->typf!=return_label) puts("no return");
  101.                             }
  102.                         }
  103.                         if(i>end&&p->branchout!=loopend->normalout&&p->branchout->start->typf!=return_label){
  104.                         /*  Sprung zu anderem als dem normalen Austritt oder return */
  105.                             end=-1;
  106.                             break;
  107.                         }
  108.                     }
  109.                     /*  testen, ob in die Schleife gesprungen wird  */
  110.                     if(p!=g){
  111.                         for(lp=p->in;lp;lp=lp->next){
  112.                             if(!lp->graph) continue;
  113.                             if(lp->graph->branchout==p){
  114.                                 i=lp->graph->index;
  115.                                 if(i<start){
  116.                                     if(report_weird_code){error(175);report_weird_code=0;}
  117.                                     end=-1;
  118.                                     break;
  119.                                 }
  120.                                 if(i>end){
  121.                                     end=-1;
  122.                                     break;
  123.                                 }
  124.                             }
  125.                         }
  126.                     }
  127.                     if(p->index==end) break;
  128.                     p=p->normalout;
  129.                 }while(end>=0);
  130.             }else{
  131.                 if(DEBUG&1024) printf("must be a loop, because there was no goto\n");
  132.             }
  133.         }
  134.         if(end>=0){
  135.             if(DEBUG&1024) printf("confirmed that it is a loop\n");
  136.             g->loopend=loopend;
  137.             c++;
  138.         }
  139.         g=g->normalout;
  140.     }
  141.     return(c);
  142. }
  143.  
  144. struct flowgraph *create_loop_headers(struct flowgraph *fg,int av)
  145. /*  Fuegt vor jede Schleife einen Kopf-Block ein, wenn noetig.  */
  146. /*  Wenn av!=0 werden aktive Variablen korrekt uebertragen und  */
  147. /*  diverse Registerlisten uebernommen und index=-1 gesetzt.    */
  148. /*  Kann einen Block mehrmals in der ->in Liste eintragen       */
  149. {
  150.     struct flowgraph *g,*last,*new,*rg=fg;
  151.     struct IC *lic;
  152.     if(DEBUG&1024) printf("creating loop-headers\n");
  153.     g=fg;last=0;
  154.     while(g){
  155.         new=0;
  156.         if(g->loopend){
  157.             if(!last){
  158.                 struct flowlist *lp;
  159.                 new=mymalloc(sizeof(struct flowgraph));
  160.                 rg=new;
  161.                 new->in=0;
  162.                 new->start=new->end=0;
  163.                 lp=mymalloc(sizeof(struct flowlist));
  164.                 lp->graph=new;
  165.                 lp->next=g->in;
  166.                 g->in=lp;
  167.             }else{
  168.                 struct flowlist *lp,*nl,**ls;
  169.                 new=mymalloc(sizeof(struct flowgraph));
  170.                 last->normalout=new;
  171.                 lic=mymalloc(ICS);
  172.                 lic->line=0;
  173.                 lic->file=0;
  174.                 new->start=new->end=lic;
  175.                 lic->code=LABEL;
  176.                 lic->typf=++label;
  177.                 lic->q1.flags=lic->q2.flags=lic->z.flags=0;
  178.                 lic->q1.am=lic->q2.am=lic->z.am=0;
  179.                 lic->use_cnt=lic->change_cnt=0;
  180.                 lic->use_list=lic->change_list=0;
  181.                 last->end->next=lic;    /*  kann nicht leer sein, da sonst branchout nicht 0 waere  */
  182.                 lic->prev=last->end;
  183.                 g->start->prev=lic;
  184.                 lic->next=g->start;
  185.                 lp=g->in;ls=&new->in;
  186.                 while(lp){
  187.                     if(lp->graph&&lp->graph->index<g->index){
  188.                     /*  Eintritt von oben soll in den Kopf  */
  189.                         nl=mymalloc(sizeof(struct flowlist));
  190.                         nl->graph=lp->graph;
  191.                         nl->next=0;
  192.                         (*ls)=nl;
  193.                         ls=&nl->next;
  194.                         if(lp->graph->branchout==g){
  195.                             if(DEBUG&1024) printf("changing branch\n");
  196.                             if(lp->graph->end->code<BEQ||lp->graph->end->code>BRA) ierror(0);
  197.                             lp->graph->end->typf=lic->typf;
  198.                             lp->graph->branchout=new;
  199.                         }
  200.                         lp->graph=new;
  201.                     }
  202.                     lp=lp->next;
  203.                 }
  204.                 if(!new->in) ierror(0);
  205.             }
  206.             if(new){
  207.                 if(DEBUG&1024) printf("must insert loop-header before block %d\n",g->index);
  208.                 basic_blocks++;
  209.                 new->branchout=0;
  210.                 new->loopend=0;
  211.                 if(av)
  212.                     new->index=-1;
  213.                 else
  214.                     new->index=basic_blocks;
  215.                 new->normalout=g;
  216.                 new->calls=0;
  217.                 new->loop_calls=0;
  218.                 new->rd_in=new->rd_out=new->rd_kill=new->rd_gen=0;
  219.                 new->ae_in=new->ae_out=new->ae_kill=new->ae_gen=0;
  220.                 new->cp_in=new->cp_out=new->cp_kill=new->cp_gen=0;
  221.                 if(!av){
  222.                     new->av_in=new->av_out=new->av_kill=new->av_gen=0;
  223.                 }else{
  224.                     new->av_in=mymalloc(vsize);
  225.                     new->av_out=mymalloc(vsize);
  226.                     new->av_gen=mymalloc(vsize);
  227.                     new->av_kill=mymalloc(vsize);
  228.                     memset(new->av_gen,0,vsize);
  229.                     memset(new->av_kill,0,vsize);
  230.                     memcpy(new->av_out,g->av_in,vsize);
  231.                     memcpy(new->av_in,g->av_in,vsize);
  232.                     memcpy(&new->regv,&g->regv,sizeof(new->regv));
  233.                     memcpy(&new->regused,&g->regused,sizeof(new->regused));
  234.                 }
  235.             }
  236.         }
  237.         last=g;
  238.         g=g->normalout;
  239.     }
  240.     return(rg);
  241. }
  242. struct flowgraph *create_loop_footers(struct flowgraph *fg,int av)
  243. /*  Fuegt hinter jede Schleife einen Fuss-Block ein, wenn noetig.   */
  244. /*  Wenn av!=0 werden aktive Variablen korrekt uebertragen und      */
  245. /*  diverse Registerlisten uebernommen und index auf -2 gesetzt.    */
  246. {
  247.     struct flowgraph *g,*loopend,*out,*new;
  248.     struct IC *lic;
  249.     if(DEBUG&1024) printf("creating loop-footers\n");
  250.     g=fg;
  251.     while(g){
  252.         new=0;
  253.         loopend=g->loopend;
  254.         if(loopend){
  255.             struct flowlist *lp,*nl,**ls;
  256.             out=loopend->normalout;
  257.             new=mymalloc(sizeof(struct flowgraph));
  258.             new->normalout=out;
  259.             loopend->normalout=new;
  260.             lic=mymalloc(ICS);
  261.             lic->line=0;
  262.             lic->file=0;
  263.             new->start=new->end=lic;
  264.             lic->code=LABEL;
  265.             lic->typf=++label;
  266.             lic->q1.flags=lic->q2.flags=lic->z.flags=0;
  267.             lic->q1.am=lic->q2.am=lic->z.am=0;
  268.             lic->use_cnt=lic->change_cnt=0;
  269.             lic->use_list=lic->change_list=0;
  270.             if(out) lp=out->in; else {lp=0;new->in=0;}
  271.             ls=&new->in;
  272.             while(lp){
  273.                 if(lp->graph&&lp->graph->index<=loopend->index&&lp->graph->index>=g->index){
  274.                 /*  Austritt aus Schleife soll in den Fuss  */
  275.                     nl=mymalloc(sizeof(struct flowlist));
  276.                     nl->graph=lp->graph;
  277.                     nl->next=0;
  278.                     (*ls)=nl;
  279.                     ls=&nl->next;
  280.                     if(lp->graph->branchout==out){
  281.                         if(DEBUG&1024) printf("changing branch\n");
  282.                         if(lp->graph->end->code<BEQ||lp->graph->end->code>BRA) ierror(0);
  283.                         lp->graph->end->typf=lic->typf;
  284.                         lp->graph->branchout=new;
  285.                     }
  286.                     lp->graph=new;
  287.                 }
  288.                 lp=lp->next;
  289.             }
  290.             if(out&&!new->in) ierror(0);
  291.             if(DEBUG&1024) printf("must insert loop-footer after block %d\n",loopend->index);
  292.             basic_blocks++;
  293.             new->branchout=0;
  294.             new->loopend=0;
  295.             if(av)
  296.                 new->index=-2;
  297.             else
  298.                 new->index=basic_blocks;
  299.             new->normalout=out;
  300.             new->calls=0;
  301.             new->loop_calls=0;
  302.             new->rd_in=new->rd_out=new->rd_kill=new->rd_gen=0;
  303.             new->ae_in=new->ae_out=new->ae_kill=new->ae_gen=0;
  304.             new->cp_in=new->cp_out=new->cp_kill=new->cp_gen=0;
  305.             if(!av){
  306.                 new->av_in=new->av_out=new->av_kill=new->av_gen=0;
  307.             }else{
  308.                 new->av_in=mymalloc(vsize);
  309.                 new->av_out=mymalloc(vsize);
  310.                 new->av_kill=mymalloc(vsize);
  311.                 new->av_gen=mymalloc(vsize);
  312.                 memset(new->av_gen,0,vsize);
  313.                 memset(new->av_kill,0,vsize);
  314.                 if(out){
  315.                     memcpy(new->av_out,out->av_in,vsize);
  316.                     memcpy(new->av_in,out->av_in,vsize);
  317.                 }else{
  318.                     memset(new->av_in,0,vsize);
  319.                     memset(new->av_out,0,vsize);
  320.                 }
  321.                 memcpy(&new->regv,&g->regv,sizeof(new->regv));
  322.                 memcpy(&new->regused,&g->regused,sizeof(new->regused));
  323.             }
  324.             insert_IC_fg(new,loopend->end,lic);
  325.         }
  326.         g=g->normalout;
  327.     }
  328.     return(fg);
  329. }
  330. void add_movable(struct IC *p,struct flowgraph *fg,int flags)
  331. /*  Fuegt IC p, das aus der Schleife in Block fg mit Flags flags    */
  332. /*  verschoben werden darf in eine Liste.                           */
  333. {
  334.     struct movlist *new=mymalloc(sizeof(*new));
  335.     new->IC=p;
  336.     new->target_fg=fg;
  337.     new->flags=flags;
  338.     new->next=0;
  339.     if(last_mov){
  340.         last_mov->next=new;
  341.         last_mov=new;
  342.     }else{
  343.         first_mov=last_mov=new;
  344.     }
  345.     BSET(moved,p->defindex);
  346.     if(flags==MOVE_IC) BSET(moved_completely,p->defindex);
  347. }
  348. int move_to_head(void)
  349. /*  Geht die Liste mit verschiebbaren ICs durch und schiebt die ICs */
  350. /*  in den Vorkopf der Schleife. Ausserdem wird die Liste           */
  351. /*  freigegeben.                                                    */
  352. /*  Der Rueckgabewert hat Bit 1 gesetzt, wenn ICs ganz verschoben   */
  353. /*  wurden und Bit 2, falls eine Berechnung mit Hilfsvariable vor   */
  354. /*  die Schleife gezogen wurde.                                     */
  355. {
  356.     struct IC **fglist; /* Letztes IC vor jedem Block   */
  357.     struct flowgraph *g;struct IC *p;struct movlist *m;
  358.     int changed=0;
  359.  
  360.     if(!first_mov) return(0);
  361.  
  362.     if(DEBUG&1024) printf("moving the ICs out of the loop\n");
  363.  
  364.     fglist=mymalloc((basic_blocks+1)*sizeof(*fglist));
  365.     p=0;
  366.     for(g=first_fg;g;g=g->normalout){
  367.         if(g->index>basic_blocks) ierror(0);
  368.         if(g->end) p=g->end;
  369.         fglist[g->index]=p;
  370.     }
  371.     while(first_mov){
  372.         p=first_mov->IC;
  373.         g=first_mov->target_fg;
  374.         if(first_mov->flags==MOVE_IC){
  375.             if(DEBUG&1024) {printf("moving IC out of loop:\n");pric2(stdout,p);}
  376.             if(!p->prev||!p->next) ierror(0);
  377.             p->next->prev=p->prev;
  378.             p->prev->next=p->next;
  379.             insert_IC_fg(g,fglist[g->index],p);
  380.             fglist[g->index]=p;
  381.             changed|=1;
  382.         }else{
  383.             struct Typ *t=mymalloc(TYPS);
  384.             struct IC *new=mymalloc(ICS);
  385.             struct Var *v;
  386.             if(DEBUG&1024) {printf("moving computation out of loop:\n");pric2(stdout,p);}
  387.             if(p->code==ADDRESS||p->code==ADDI2P||p->code==SUBIFP) t->flags=POINTER;
  388.                 else t->flags=p->typf;
  389.             if(p->code==COMPARE||p->code==TEST) t->flags=0;
  390.             if((t->flags&15)==POINTER){
  391.                 t->next=mymalloc(TYPS);
  392.                 t->next->flags=VOID;
  393.                 t->next->next=0;
  394.             }else t->next=0;
  395.             v=add_var(empty,t,AUTO,0);
  396.             *new=*p;
  397.             new->z.flags=VAR;
  398.             new->z.v=v;
  399.             new->z.val.vlong=l2zl(0L);
  400.             /*  Die neue Operation benutzt maximal, was die andere benutzte */
  401.             /*  und aendert nur die Hilfsvariable.                          */
  402.             if(have_alias){
  403.                 new->use_cnt=p->use_cnt;
  404.                 new->use_list=mymalloc(new->use_cnt*VLS);
  405.                 memcpy(new->use_list,p->use_list,new->use_cnt*VLS);
  406.                 new->change_cnt=1;
  407.                 new->change_list=mymalloc(VLS);
  408.                 new->change_list[0].v=v;
  409.                 new->change_list[0].flags=0;
  410.             }
  411.             insert_IC_fg(g,fglist[g->index],new);
  412.             fglist[g->index]=new;
  413.             p->code=ASSIGN;
  414.             p->typf=t->flags;
  415.             p->q1.flags=VAR;
  416.             p->q1.v=v;
  417.             p->q1.val.vlong=l2zl(0L);
  418.             p->q2.flags=0;
  419.             p->q2.val.vlong=szof(t);
  420.             /*  Die Operation in der Schleife benutzt nun zusaetzlich   */
  421.             /*  noch die Hilfsvariable.                                 */
  422.             if(have_alias){
  423.                 void *m=p->use_list;
  424.                 p->use_cnt++;
  425.                 p->use_list=mymalloc(p->use_cnt*VLS);
  426.                 memcpy(&p->use_list[1],m,(p->use_cnt-1)*VLS);
  427.                 free(m);
  428.                 p->use_list[p->use_cnt].v=v;
  429.                 p->use_list[p->use_cnt].flags=0;
  430.             }
  431.             changed|=2;
  432.         }
  433.         m=first_mov->next;
  434.         free(first_mov);
  435.         first_mov=m;
  436.     }
  437.     if(DEBUG&1024) print_flowgraph(first_fg);
  438.     free(fglist);
  439.     return(changed);
  440. }
  441. void calc_movable(struct flowgraph *start,struct flowgraph *end)
  442. /*  Berechnet, welche Definitionen nicht aus der Schleife start-end     */
  443. /*  verschoben werden duerfen. Eine Def. p von z darf nur verschoben    */
  444. /*  werden, wenn keine andere Def. von p existiert und alle             */
  445. /*  Verwendungen von z nur von p erreicht werden.                       */
  446. /*  Benutzt rd_defs, rd_tmp, rd_vars.                                   */
  447. {
  448.     struct flowgraph *g;struct IC *p;
  449.     int i,j,k,d;
  450.     unsigned char *changed_vars;
  451.     if(DEBUG&1024) printf("calculating not_movable for blocks %d to %d\n",start->index,end->index);
  452.     if(!(c_flags_val[0].l&1024)){
  453.         memset(not_movable,UCHAR_MAX,dsize);
  454.         return;
  455.     }
  456.     memset(not_movable,0,dsize);
  457.     changed_vars=mymalloc(vsize);
  458.     memset(changed_vars,0,vsize);
  459.     for(g=start;g;g=g->normalout){
  460.         if(!g->rd_in) ierror(0);
  461.         memcpy(rd_defs,g->rd_in,dsize);
  462.         for(p=g->start;p;p=p->next){
  463.             for(j=0;j<p->change_cnt;j++){
  464.                 i=p->change_list[j].v->index;
  465.                 if(p->change_list[j].flags&DREFOBJ) i+=vcount-rcount;
  466.                 if(i>=vcount) continue;
  467.                 if(BTST(changed_vars,i)){
  468.                     bvunite(not_movable,defs[i],dsize);
  469.                 }else{
  470.                     BSET(changed_vars,i);
  471.                 }
  472.             }
  473.             for(k=0;k<p->use_cnt;k++){
  474.                 i=p->use_list[k].v->index;
  475.                 if(p->use_list[k].flags&DREFOBJ) i+=vcount-rcount;
  476.                 if(i>=vcount) continue;
  477.                 if(BTST(rd_defs,i+dcount+1)){   /*  undefined->i ?  */
  478.                     bvunite(not_movable,defs[i],dsize);
  479.                 }else{
  480.                     memcpy(rd_tmp,rd_defs,dsize);
  481.                     bvdiff(rd_tmp,defs[i],dsize);
  482.                     for(d=-1,j=0;j<=dcount;j++){
  483.                         if(BTST(rd_defs,j)){
  484.                             if(d>=0){  /*  mehr als eine Def.  */
  485.                                 bvunite(not_movable,defs[i],dsize);
  486.                                 d=-1;break;
  487.                             }else d=j;
  488.                         }
  489.                     }
  490.                     if(d>=0){
  491.                         memcpy(rd_tmp,defs[i],dsize);
  492.                         BCLR(rd_tmp,j);
  493.                         bvunite(not_movable,rd_tmp,dsize);
  494.                     }
  495.                 }
  496.             }
  497.  
  498.             /*  Das hier, um rd_defs zu aktualisieren.  */
  499.             rd_change(p);
  500.             if(p==g->end) break;
  501.         }
  502.         if(g==end) break;
  503.     }
  504.     free(changed_vars);
  505. }
  506. int used_in_loop_only(struct flowgraph *start,struct flowgraph *end,struct obj *o)
  507. /*  Testet, ob Variable nur in der Schleife benutzt wird.               */
  508. /*  Z.Z. wird nur auf Hilfsvariablen getestet.                          */
  509. /*  Unbedingt aendern!                                                  */
  510. {
  511.     struct Var *v;struct flowgraph *g;struct IC *p;
  512.     if((o->flags&(VAR|DREFOBJ))!=VAR) return(0);
  513.     v=o->v;
  514.     if((v->flags&USEDASADR)||v->nesting==0||v->storage_class==EXTERN||v->storage_class==STATIC)
  515.         return(0);
  516.     for(g=first_fg;g;g=g->normalout){
  517.         if(g==start) g=end->normalout;
  518.         if(!g) break;
  519.         for(p=g->start;p;p=p->next){
  520.             if((p->q1.flags&VAR)&&p->q1.v==v) return(0);
  521.             if((p->q2.flags&VAR)&&p->q2.v==v) return(0);
  522.             if((p->z.flags&VAR)&&p->z.v==v) return(0);
  523.             if(p==g->end) break;
  524.         }
  525.         if(g==end) break;
  526.     }
  527.     return(1);
  528. }
  529.  
  530. int always_reached(struct flowgraph *start,struct flowgraph *end,struct flowgraph *fg,struct IC *z,int ignorecall)
  531. /*  Testet, ob z immer ausgefuehrt wird, falls start in fg ausgefuehrt  */
  532. /*  wird. fg_tmp ist ein Bitvektor, um zu merken, welche Bloecke sicher */
  533. /*  zu z fuehren. Das ganze fuer die Schleife start-end.                */
  534. /*  Wenn ignorecall!=0 ist, wird angenommen, dass jeder CALL            */
  535. /*  zurueckkehrt (das ist nuetzlich fuer loop-unrolling).               */
  536. {
  537.     unsigned char *bmk=fg_tmp;
  538.     struct IC *p;struct flowgraph *g;
  539.     int changed;
  540.  
  541.     for(p=z;p;p=p->prev){
  542.         if(!ignorecall&&p->code==CALL) return(0);
  543.         if(p==fg->start) break;
  544.     }
  545.  
  546.     if(fg==start) return(1);
  547.  
  548.     memset(bmk,0,bsize);
  549.     BSET(bmk,fg->index);
  550.  
  551.     do{
  552.         changed=0;
  553.         for(g=start;g;g=g->normalout){
  554.             if(!BTST(bmk,g->index)){
  555.                 struct flowgraph *n=g->normalout;
  556.                 struct flowgraph *b=g->branchout;
  557.                 if(n||b){
  558.                     if((!b||BTST(bmk,b->index))&&
  559.                        (!n||(g->end&&g->end->code==BRA)||BTST(bmk,n->index))){
  560.                         for(p=g->end;p;p=p->prev){
  561.                             if(!ignorecall&&p->code==CALL) break;
  562.                             if(p==g->start){
  563.                                 if(g==start) return(1);
  564.                                 changed=1; BSET(bmk,g->index);
  565.                                 break;
  566.                             }
  567.                         }
  568.                     }
  569.                 }
  570.             }
  571.             if(g==end) break;
  572.         }
  573.     }while(changed);
  574.     return(0);
  575. }
  576.  
  577. int def_invariant(int vindex,int ignore)
  578. /*  Ermittelt, ob Variable vindex schleifeninvariant unter den Bedingungen  */
  579. /*  rd_defs, inloop und invariant ist.                                      */
  580. /*  Definition ignore wird nicht beachtet. Wenn ignore auf eine gueltige    */
  581. /*  Definition gesetzt wird, kann man somit auf Induktionsvariablen testen  */
  582. /*  (das Ergebnis sagt dann, ob das die einzige Definition in der Schleife  */
  583. /*  ist).                                                                   */
  584. {
  585.     int i,k,j,d=0;
  586. /*    printf("def_inv(%d)=%s(%ld)\n",vindex,vilist[vindex]->identifier,zl2l(vilist[vindex]->offset));*/
  587.     if(!BTST(rd_defs,vindex+dcount+1)){
  588.         memcpy(rd_tmp,rd_defs,dsize);
  589.         bvintersect(rd_tmp,defs[vindex],dsize);
  590.         for(j=1;j<=dcount;j++){
  591.             if(j!=ignore&&BTST(rd_tmp,j)&&BTST(inloop,j)){
  592.                 /*  Mehr als eine moegliche Def. innerhalb der Schleife oder    */
  593.                 /*  eine invariante Def. in der Schleife => nicht invariant.    */
  594.                 if(d) return(0);
  595.                 if(!BTST(moved_completely,j)) return(0);
  596.                 d=1;
  597.             }
  598.         }
  599.     }else{
  600.         for(j=1;j<=dcount;j++){
  601.             if(j!=ignore&&BTST(rd_defs,j)&&BTST(inloop,j)){
  602.                 struct IC *p=dlist[j];
  603.                 for(k=0;k<p->change_cnt;k++){
  604.                     i=p->change_list[k].v->index;
  605.                     if(p->change_list[k].flags&DREFOBJ) i+=vcount-rcount;
  606. /*                    printf("modifies %d\n",i);*/
  607.                     if(i==vindex) break;
  608.                 }
  609.                 if(k>=p->change_cnt) continue;
  610.                 /*  Mehr als eine moegliche Def. innerhalb der Schleife oder    */
  611.                 /*  eine invariante Def. in der Schleife => nicht invariant.    */
  612.                 if(d) return(0);
  613.                 if(!BTST(moved_completely,j)) return(0);
  614.                 d=1;
  615.             }
  616.         }
  617.     }
  618.     return(1);
  619. }
  620.  
  621. void frequency_reduction(struct flowgraph *start,struct flowgraph *end,struct flowgraph *head)
  622. /*  Schleifeninvariante ICs finden und in eine Liste eintragen, falls   */
  623. /*  sie vor die Schleife gezogen werden koennen.                        */
  624. {
  625.     struct IC *p;struct flowgraph *g;
  626.  
  627.  
  628.     int i,changed;
  629.  
  630.     if(head&&start->loopend){
  631.         end=start->loopend;
  632.  
  633.         if(DEBUG&1024){
  634.             printf("searching for loop-invariant code in loop from block %d to %d\n",start->index,end->index);
  635.             printf("head_fg=%d\n",head->index);
  636.         }
  637.         calc_movable(start,end);
  638.         /*  erstmal kein IC invariant   */
  639.         memset(invariant,0,dsize);
  640.  
  641.         /*  kennzeichnen, welche ICs in der Schleife liegen */
  642.         memset(inloop,0,dsize);
  643.         for(g=start;g;g=g->normalout){
  644.             for(p=g->start;p;p=p->next){
  645.                 if(p->defindex) BSET(inloop,p->defindex);
  646.                 if(p==g->end) break;
  647.             }
  648.             if(g==end) break;
  649.         }
  650.  
  651.         do{
  652.             changed=0;
  653.             if(DEBUG&1024) printf("loop-invariant pass\n");
  654.  
  655.             /*  Schleifeninvariante ICs suchen  */
  656.  
  657.             for(g=start;g;g=g->normalout){
  658.                 memcpy(rd_defs,g->rd_in,dsize);
  659.                 for(p=g->start;p;p=p->next){
  660.                     int k1,k2;
  661.                     /*  testen, ob IC neu als invariant markiert werden kann    */
  662.                     if(p->defindex&&p->code!=CALL&&p->code!=GETRETURN&&!BTST(invariant,p->defindex)){
  663.                         if(!BTST(inloop,p->defindex)) ierror(0);
  664.                         if(p->code==ADDRESS||!p->q1.flags||(p->q1.flags&KONST)||(p->q1.flags&VARADR)){
  665.                             k1=1;
  666.                         }else{
  667.                             if(!(p->q1.flags&VAR)) ierror(0);
  668.                             i=p->q1.v->index;
  669.                             if(p->q1.flags&DREFOBJ) i+=vcount-rcount;
  670.                             k1=def_invariant(i,-1);
  671.                         }
  672.                         if(k1){
  673.                             if(!p->q2.flags||(p->q2.flags&KONST)||(p->q2.flags&VARADR)){
  674.                                 k2=1;
  675.                             }else{
  676.                                 if(!(p->q2.flags&VAR)) ierror(0);
  677.                                 i=p->q2.v->index;
  678.                                 if(p->q2.flags&DREFOBJ) i+=vcount-rcount;
  679.                                 k2=def_invariant(i,-1);
  680.                             }
  681.                         }
  682.                         if(k1&&k2){
  683. /*                            if(DEBUG&1024){ printf("found loop-invariant IC:\n");pric2(stdout,p);}*/
  684.                             if(!BTST(moved,p->defindex)&&(always_reached(start,end,g,p,0)||(!dangerous_IC(p)&&used_in_loop_only(start,end,&p->z)))){
  685.                                 if(p->z.flags&DREFOBJ)
  686.                                     k1=def_invariant(p->z.v->index,-1);
  687.                                 else
  688.                                     k1=1;
  689. /*                                if(DEBUG&1024) printf("always reached or used only in loop\n");*/
  690.                                 if(k1&&!BTST(not_movable,p->defindex)){
  691. /*                                    if(DEBUG&1024) printf("movable\n");*/
  692.                                     add_movable(p,head,MOVE_IC);
  693.                                 }else{
  694.                                     if(p->code==ADDRESS||((p->typf&15)<=POINTER&&(p->q2.flags||(p->q1.flags&DREFOBJ)))){
  695. /*                                        if(DEBUG&1024) printf("move computation out of loop\n");*/
  696.                                         add_movable(p,head,MOVE_COMP);
  697.                                     }
  698.                                 }
  699.                             }else{
  700.                             /*  Wenn IC immer erreicht wird oder ungefaehrlich  */
  701.                             /*  ist, kann man zumindest die Operation           */
  702.                             /*  rausziehen, falls das lohnt.                    */
  703.                                 if(!BTST(moved,p->defindex)&&(!dangerous_IC(p)&&(p->typf&15)<=POINTER&&(p->q2.flags||(p->q1.flags&DREFOBJ)||p->code==ADDRESS))){
  704. /*                                    if(DEBUG&1024) printf("move computation out of loop\n");*/
  705.                                     add_movable(p,head,MOVE_COMP);
  706.                                 }
  707.                             }
  708.                             BSET(invariant,p->defindex);
  709.                             changed=1;
  710.                         }
  711.                     }
  712.  
  713.                     /*  Das hier, um rd_defs zu aktualisieren.  */
  714.                     rd_change(p);
  715.  
  716.                     if(p==g->end) break;
  717.                 }
  718.                 if(g==end) break;
  719.             }
  720.         }while(changed);
  721.  
  722.     }
  723.     return;
  724. }
  725. void add_sr(struct IC *p,struct flowgraph *fg,int i_var)
  726. /*  Fuegt IC p, das aus der Schleife in Block fg lineare Fkt. der   */
  727. /*  Induktionsvariable i_var ist, in Liste ein.                     */
  728. /*  Funktioniert als Stack. Da mit aeusseren Schleifen angefangen   */
  729. /*  wird, werden ICs zuerst in inneren Schleifen reduziert. Da ein  */
  730. /*  IC nur einmal reduziert wird, sollte dadurch das Problem eines  */
  731. /*  ICs, das potentiell in mehreren Schleifen reduziert werden      */
  732. /*  koennte, geloest werden.                                        */
  733. {
  734.     struct srlist *new=mymalloc(sizeof(*new));
  735.     new->IC=p;
  736.     new->target_fg=fg;
  737.     new->ind_var=ind_vars[i_var];
  738.     new->next=first_sr;
  739.     new->hv=0;
  740.     first_sr=new;
  741. #if 0
  742.     if(last_sr){
  743.         last_sr->next=new;
  744.         last_sr=new;
  745.     }else{
  746.         first_sr=last_sr=new;
  747.     }
  748. #endif
  749. }
  750. int do_sr(void)
  751. /*  Durchlaufe die Liste aller strength-reduction-Kandidaten und    */
  752. /*  ersetze sie durch neue Induktionsvariablen. Dabei aufpassen,    */
  753. /*  falls ein IC schon von frequency-reduction bearbeitet wurde.    */
  754. /*  Ausserdem wird die Liste freigegeben.                           */
  755. {
  756.     struct IC **fglist; /* Letztes IC vor jedem Block   */
  757.     struct IC *p;
  758.     struct flowgraph *g;
  759.     struct srlist *mf;
  760.     int changed=0;
  761.  
  762.     if(!first_sr) return(0);
  763.  
  764.     if(DEBUG&1024) printf("performing strength-reductions\n");
  765.  
  766.     fglist=mymalloc((basic_blocks+1)*sizeof(*fglist));
  767.     p=0;
  768.     for(g=first_fg;g;g=g->normalout){
  769.         if(g->index>basic_blocks) ierror(0);
  770.         if(g->end) p=g->end;
  771.         fglist[g->index]=p;
  772.     }
  773.  
  774.     while(first_sr){
  775.         struct Var *niv,*nstep;
  776.         struct Typ *t1,*t2;
  777.         struct IC *iv_ic,*new,*m;
  778.         int i,c;
  779.         p=first_sr->IC;
  780.         i=p->defindex;
  781.         /*  Falls IC noch nicht verschoben und noch nicht reduziert wurde.  */
  782.         if(!BTST(moved,i)&&p->code!=ASSIGN){
  783.             if(first_sr->hv){
  784.             /*  Es wurde schon eine aequivalente Operation reduziert, wir   */
  785.             /*  koennen also dieselbe Hilfsvariable benutzen.               */
  786.                 p->code=ASSIGN;
  787.                 p->q1.flags=VAR;
  788.                 p->q1.v=first_sr->hv;
  789.                 p->q1.val.vlong=l2zl(0L);
  790.                 p->q2.flags=0;
  791.                 p->q2.val.vlong=szof(p->z.v->vtyp);
  792.                 /*  Hilfsvariable wird jetzt auch benutzt.  */
  793.                 if(have_alias){
  794.                     void *m=p->use_list;
  795.                     p->use_cnt++;
  796.                     p->use_list=mymalloc(p->use_cnt*VLS);
  797.                     memcpy(&p->use_list[1],m,(p->use_cnt-1)*VLS);
  798.                     free(m);
  799.                     p->use_list[0].v=first_sr->hv;
  800.                     p->use_list[0].flags=0;
  801.                 }
  802.             }else{
  803.                 int minus=0;
  804.                 if(DEBUG&1024){ printf("performing strength-reduction on IC:\n");pric2(stdout,p);}
  805.                 c=p->code;
  806.                 g=first_sr->target_fg;
  807.                 iv_ic=first_sr->ind_var;
  808.                 /*  Merken, wenn IC von der Form SUB x,ind_var->z   */
  809.                 if(c==SUB&&!compare_objs(&p->q2,&iv_ic->z,iv_ic->typf))
  810.                     minus=1;
  811.                 t1=mymalloc(TYPS);
  812.                 t1->flags=p->typf;
  813.                 if(c==ADDI2P||c==SUBIFP){
  814.                     t1->flags=POINTER;
  815.                     t1->next=mymalloc(TYPS);
  816.                     t1->next->flags=VOID;
  817.                     t1->next->next=0;
  818.                 }else t1->next=0;
  819.                 niv=add_var(empty,t1,AUTO,0);
  820.                 /*  Suchen, ob es noch aequivalente Operationen gibt.   */
  821.                 /*  Noch sehr ineffizient...                            */
  822.                 for(mf=first_sr->next;mf;mf=mf->next){
  823.                     if(mf->target_fg==g&&mf->ind_var==iv_ic){
  824.                         m=mf->IC;
  825.                         if(c==m->code&&p->typf==m->typf&&
  826.                            !compare_objs(&p->q1,&m->q1,p->typf)&&
  827.                            !compare_objs(&p->q2,&m->q2,p->typf)){
  828.                             if(mf->hv) ierror(0);
  829.                             mf->hv=niv;
  830.                             if(DEBUG&1024){ printf("equivalent operation\n");pric2(stdout,m);}
  831.                         }
  832.                     }
  833.                 }
  834.                 /*  Initialisierung der Hilfsinduktionsvariablen    */
  835.                 new=mymalloc(ICS);
  836.                 *new=*p;
  837.                 new->z.flags=VAR;
  838.                 new->z.v=niv;
  839.                 new->z.val.vlong=l2zl(0L);
  840.                 /*  IC benutzt dasselbe wie p und aendert nur niv.  */
  841.                 if(have_alias){
  842.                     new->change_cnt=1;
  843.                     new->change_list=mymalloc(VLS);
  844.                     new->change_list[0].v=niv;
  845.                     new->change_list[0].flags=0;
  846.                     new->use_cnt=p->use_cnt;
  847.                     new->use_list=mymalloc(new->use_cnt*VLS);
  848.                     memcpy(new->use_list,p->use_list,new->use_cnt*VLS);
  849.                 }
  850.                 insert_IC_fg(g,fglist[g->index],new);
  851.                 fglist[g->index]=m=new;
  852.                 /*  Ersetzen der Operation durch die Hilfsvariable  */
  853.                 p->code=ASSIGN;
  854.                 p->typf=t1->flags;
  855.                 p->q1=m->z;
  856.                 p->q2.flags=0;
  857.                 p->q2.val.vlong=szof(t1);
  858.                 /*  Benutzt jetzt auch Hilfsvariable.               */
  859.                 if(have_alias){
  860.                     void *mr=p->use_list;
  861.                     p->use_cnt++;
  862.                     p->use_list=mymalloc(p->use_cnt*VLS);
  863.                     memcpy(&p->use_list[1],mr,(p->use_cnt-1)*VLS);
  864.                     free(mr);
  865.                     new->use_list[0].v=niv;
  866.                     new->use_list[0].flags=0;
  867.                 }
  868.                 /*  Berechnen der Schrittweite fuer Hilfsvariable   */
  869.                 if(c==MULT){
  870.                     t2=mymalloc(TYPS);
  871.                     t2->flags=iv_ic->typf;
  872.                     t2->next=0;
  873.                     nstep=add_var(empty,t2,AUTO,0);
  874.                     new=mymalloc(ICS);
  875.                     new->line=iv_ic->line;
  876.                     new->file=iv_ic->file;
  877.                     new->code=MULT;
  878.                     new->typf=p->typf;
  879.                     new->z.flags=VAR;
  880.                     new->z.v=nstep;
  881.                     new->z.val.vlong=l2zl(0L);
  882.                     if(!compare_objs(&m->q1,&iv_ic->z,iv_ic->typf)) new->q1=m->q2;
  883.                         else new->q1=m->q1;
  884.                     if(!compare_objs(&iv_ic->q1,&iv_ic->z,iv_ic->typf)) new->q2=iv_ic->q2;
  885.                         else new->q2=iv_ic->q1;
  886.                     /*  Benutzt dasselbe wie iv_ic und m.   */
  887.                     if(have_alias){
  888.                         new->use_cnt=iv_ic->use_cnt+m->use_cnt;
  889.                         new->use_list=mymalloc(new->use_cnt*VLS);
  890.                         memcpy(new->use_list,iv_ic->use_list,iv_ic->use_cnt*VLS);
  891.                         memcpy(&new->use_list[iv_ic->use_cnt],m->use_list,m->use_cnt*VLS);
  892.                         new->change_cnt=1;
  893.                         new->change_list=mymalloc(VLS);
  894.                         new->change_list[0].v=nstep;
  895.                         new->change_list[0].flags=0;
  896.                     }
  897.                     insert_IC_fg(g,fglist[g->index],new);
  898.                     fglist[g->index]=m=new;
  899.                 }
  900.                 /*  Erhoehen der Hilfsvariable um Schrittweite      */
  901.                 new=mymalloc(ICS);
  902.                 new->line=iv_ic->line;
  903.                 new->file=iv_ic->file;
  904.  
  905.                 new->code=iv_ic->code;
  906.                 if(minus){
  907.                     switch(new->code){
  908.                         case ADD:     new->code=SUB; break;
  909.                         case SUB:     new->code=ADD; break;
  910.                         case ADDI2P:  new->code=SUBIFP; break;
  911.                         case SUBIFP:  new->code=ADDI2P; break;
  912.                     }
  913.                 }
  914.                 if(t1->flags==POINTER){
  915.                     if(new->code==ADD) new->code=ADDI2P;
  916.                     if(new->code==SUB) new->code=SUBIFP;
  917.                 }
  918.                 new->typf=iv_ic->typf;
  919.                 new->q1.flags=VAR;
  920.                 new->q1.v=niv;
  921.                 new->q1.val.vlong=l2zl(0L);
  922.                 new->z=new->q1;
  923.                 if(c==MULT){
  924.                     new->q2=m->z;
  925.                 }else{
  926.                     if(!compare_objs(&iv_ic->q1,&iv_ic->z,iv_ic->typf)) new->q2=iv_ic->q2;
  927.                         else new->q2=iv_ic->q1;
  928.                 }
  929.                 if(have_alias){
  930.                     new->use_cnt=iv_ic->use_cnt+m->use_cnt;
  931.                     new->use_list=mymalloc(new->use_cnt*VLS);
  932.                     memcpy(new->use_list,iv_ic->use_list,iv_ic->use_cnt*VLS);
  933.                     memcpy(&new->use_list[iv_ic->use_cnt],m->use_list,m->use_cnt*VLS);
  934.                     new->change_cnt=1;
  935.                     new->change_list=mymalloc(VLS);
  936.                     new->change_list[0].v=niv;
  937.                     new->change_list[0].flags=0;
  938.                 }
  939.                 /*  Flussgraph muss nur bei den Schleifenkoepfen ok sein.   */
  940.                 insert_IC(iv_ic,new);
  941.                 changed|=2;
  942.             }
  943.         }
  944.         mf=first_sr->next;
  945.         free(first_sr);
  946.         first_sr=mf;
  947.     }
  948.     free(fglist);
  949.     return(changed);
  950. }
  951. void strength_reduction(struct flowgraph *start,struct flowgraph *end,struct flowgraph *head)
  952. /*  Ersetzen von Operationen mit einer Induktionsvariablen und einem    */
  953. /*  schleifeninvarianten Operanden durch eine zusaetzliche              */
  954. /*  Hilfsinduktionsvariable.                                            */
  955. {
  956.     struct flowgraph *g;struct IC *p;
  957.     int i;
  958.     if(DEBUG&1024) printf("performing strength_reduction on blocks %d to %d\n",start->index,end->index);
  959.     for(i=0;i<vcount;i++) ind_vars[i]=0;
  960.     /*  Nach Induktionsvariablen suchen.    */
  961.     for(g=start;g;g=g->normalout){
  962.         memcpy(rd_defs,g->rd_in,dsize);
  963.         for(p=g->start;p;p=p->next){
  964.             int c=p->code;
  965.             if(c==ADD||c==ADDI2P||c==SUB||c==SUBIFP){
  966.                 if(!compare_objs(&p->q1,&p->z,p->typf)){
  967. /*                    if(DEBUG&1024){printf("possible induction:\n");pric2(stdout,p);}*/
  968.                     if(p->q2.flags&VAR){
  969.                         i=p->q2.v->index;
  970.                         if(p->q2.flags&DREFOBJ) i+=vcount-rcount;
  971.                     }
  972.                     if((p->q2.flags&(VAR|VARADR))!=VAR||def_invariant(i,-1)){
  973.                         i=p->z.v->index;
  974.                         if(p->z.flags&DREFOBJ) i+=vcount-rcount;
  975.                         if(def_invariant(i,p->defindex)){
  976. /*                            if(DEBUG&1024) {printf("found basic induction var:\n");pric2(stdout,p);}*/
  977.                             ind_vars[i]=p;
  978.                         }
  979.                     }
  980.                 }
  981.                 if(USEQ2ASZ&&c!=SUB&&c!=SUBIFP&&!compare_objs(&p->q2,&p->z,p->typf)){
  982. /*                    if(DEBUG&1024){printf("possible induction:\n");pric2(stdout,p);}*/
  983.                     if(p->q1.flags&VAR){
  984.                         i=p->q1.v->index;
  985.                         if(p->q1.flags&DREFOBJ) i+=vcount-rcount;
  986.                     }
  987.                     if((p->q1.flags&(VAR|VARADR))!=VAR||def_invariant(i,-1)){
  988.                         i=p->z.v->index;
  989.                         if(p->z.flags&DREFOBJ) i+=vcount-rcount;
  990.                         if(def_invariant(i,p->defindex)){
  991. /*                            if(DEBUG&1024) {printf("found basic induction var:\n");pric2(stdout,p);}*/
  992.                             ind_vars[i]=p;
  993.                         }
  994.                     }
  995.                 }
  996.             }
  997.  
  998.             /*  Das hier, um rd_defs zu aktualisieren.  */
  999.             rd_change(p);
  1000.  
  1001.             if(p==g->end) break;
  1002.         }
  1003.         if(g==end) break;
  1004.     }
  1005.     /*  Nach reduzierbaren Operationen suchen   */
  1006.     for(g=start;g;g=g->normalout){
  1007.         memcpy(rd_defs,g->rd_in,dsize);
  1008.         for(p=g->start;p;p=p->next){
  1009.             if((p->code==MULT||p->code==ADD||p->code==SUB||p->code==ADDI2P||p->code==SUBIFP)&&
  1010.                (((p->typf&15)!=FLOAT&&(p->typf&15)!=DOUBLE)||(c_flags[21]&USEDFLAG)) ){
  1011.                 int k1,k2,iv;
  1012.                 if((p->q1.flags&(VAR|VARADR))==VAR){
  1013.                     i=p->q1.v->index;
  1014.                     if(p->q1.flags&DREFOBJ) i+=vcount-rcount;
  1015.                     if(ind_vars[i]) {k1=1;iv=i;}
  1016.                     else if(def_invariant(i,-1)) k1=2;
  1017.                     else k1=0;
  1018.                 }else k1=2;
  1019.                 if((p->q2.flags&(VAR|VARADR))==VAR){
  1020.                     i=p->q2.v->index;
  1021.                     if(p->q2.flags&DREFOBJ) i+=vcount-rcount;
  1022.                     if(ind_vars[i]) {k2=1;iv=i;}
  1023.                     else if(def_invariant(i,-1)) k2=2;
  1024.                     else k2=0;
  1025.                 }else k2=2;
  1026.                 if(p->z.flags&VAR){
  1027.                 /*  Aufpassen, dass eine Induktion nicht selbst reduziert   */
  1028.                 /*  wird.                                                   */
  1029.                     i=p->z.v->index;
  1030.                     if(p->z.flags&DREFOBJ) i+=vcount-rcount;
  1031.                     if(ind_vars[i]) k1=0;
  1032.                 }
  1033.                 if(k1+k2==3){
  1034. /*                    if(DEBUG&1024) {printf("could perform strength-reduction on:\n");pric2(stdout,p);}*/
  1035.                     add_sr(p,head,iv);
  1036.                 }
  1037.             }
  1038.             /*  Das hier, um rd_defs zu aktualisieren.  */
  1039.             rd_change(p);
  1040.  
  1041.             if(p==g->end) break;
  1042.         }
  1043.         if(g==end) break;
  1044.     }
  1045. }
  1046. void copy_code(struct IC *start,struct IC *end,struct IC *dest,int n)
  1047. /*  Kopiert Code von start bis end n-mal hinter dest. Generiert         */
  1048. /*  entsprechend neue Labels. Allerdings wird der Flussgraph und        */
  1049. /*  aliasing-info nicht angepasst und muss danach neu generiert werden. */
  1050. {
  1051.     int firstl=0,lastl=0,*larray,i,j;
  1052.     struct IC *p,*new;
  1053.     if(DEBUG&1024) printf("copy_code %d times\n",n);
  1054.     /*  Feststellen, welche Labels in der Schleife definiert werden.    */
  1055.     for(p=start;p;p=p->next){
  1056.         if(p->code==LABEL){
  1057.             if(firstl==0||firstl>p->typf) firstl=p->typf;
  1058.             if(lastl ==0|| lastl<p->typf) lastl =p->typf;
  1059.         }
  1060.         if(p==end) break;
  1061.     }
  1062.     if(DEBUG&1024) printf("firstl=%d, lastl=%d\n",firstl,lastl);
  1063.     larray=mymalloc((lastl-firstl+1)*sizeof(*larray));
  1064.     for(i=0;i<=lastl-firstl;i++) larray[i]=0;
  1065.     for(p=start;p;p=p->next){
  1066.         if(p->code==LABEL) larray[p->typf-firstl]=1;
  1067.         if(p==end) break;
  1068.     }
  1069.     /*  Hauptschleife.  */
  1070.     for(i=0;i<n;i++){
  1071.         /*  Neue Labels erzeugen.   */
  1072.         for(j=0;j<=lastl-firstl;j++)
  1073.             if(larray[j]) larray[j]=++label;
  1074.         /*  Code kopieren (rueckwaerts).    */
  1075.         for(p=end;p;p=p->prev){
  1076.             new=mymalloc(ICS);
  1077.             *new=*p;
  1078.             /*  Fuer free_alias.    */
  1079.             new->change_cnt=new->use_cnt=0;
  1080.             /*  Evtl. Label anpassen.   */
  1081.             if(p->code>=LABEL&&p->code<=BRA){
  1082.                 if(p->typf>=firstl&&p->typf<=lastl&&larray[p->typf-firstl])
  1083.                     new->typf=larray[p->typf-firstl];
  1084.             }
  1085.             insert_IC(dest,new);
  1086.             if(p==start) break;
  1087.         }
  1088.     }
  1089.     free(larray);
  1090. }
  1091. void add_ur(int flags,long total,long unroll,struct flowgraph *start,struct flowgraph *head,struct IC *cmp,struct IC *branch,struct IC *ind)
  1092. /*  Fuegt Daten fuer loop-unrolling in Stack ein.                       */
  1093. {
  1094.     struct urlist *new=mymalloc(sizeof(struct urlist));
  1095.     if(DEBUG&1024) printf("add_ur, flags=%d\n",flags);
  1096.     new->flags=flags;
  1097.     new->total=total;
  1098.     new->unroll=unroll;
  1099.     new->start=start;
  1100.     new->head=head;
  1101.     new->cmp=cmp;
  1102.     new->branch=branch;
  1103.     new->ind=ind;
  1104.     new->next=first_ur;
  1105.     first_ur=new;
  1106. }
  1107. int do_unroll(int donothing)
  1108. /*  Fuehrt loop-unrolling durch. Wenn donothing!=0, wird die Liste nur  */
  1109. /*  freigegeben.                                                        */
  1110. {
  1111.     int changed=0; struct urlist *m;
  1112.     while(m=first_ur){
  1113.         int flags=m->flags;
  1114.         long total=m->total,unroll=m->unroll;
  1115.         struct flowgraph *start=m->start,*head=m->head;
  1116.         struct IC *cmp=m->cmp,*branch=m->branch,*ind=m->ind;
  1117.         if(donothing) flags=0;
  1118.         if(flags==UNROLL_COMPLETELY){
  1119.             /*  Schleife komplett ausrollen.    */
  1120.             if(DEBUG&1024) printf("unroll loop completely\n");
  1121.             copy_code(start->start->next,cmp->prev,start->start,total-1);
  1122.             if(DEBUG&1024) printf("removing loop branch\n");
  1123.             remove_IC(branch);
  1124.             if(!cmp->z.flags){
  1125.                 remove_IC(cmp);
  1126.                 if(DEBUG&1024) printf("removing loop compare\n");
  1127.             }
  1128.             changed|=1;
  1129.         }
  1130.         if(flags==UNROLL_MODULO){
  1131.             /*  Schleife teilweise ausrollen.   */
  1132.             if(DEBUG&1024) printf("unroll loop partially, n=%ld,r=%ld\n",unroll,total%unroll);
  1133.             if(unroll>1){
  1134.                 copy_code(start->start->next,cmp->prev,head->start,total%unroll);
  1135.                 copy_code(start->start->next,cmp->prev,start->start,unroll-1);
  1136.                 changed|=1;
  1137.             }
  1138.         }
  1139.         if(flags==UNROLL_INVARIANT){
  1140.             struct IC *new; struct Var *v; int out=++label;
  1141.             long i; struct Typ *t;
  1142.             if(DEBUG&1024) printf("unrolling non-constant loop\n");
  1143.             if(cmp->q1.flags&VAR) t=cmp->q1.v->vtyp; else t=cmp->q2.v->vtyp;
  1144.             v=add_var(empty,clone_typ(t),AUTO,0);
  1145.             /*  branch dient hier teilweise als leere Schablone.    */
  1146.             /*  Label an Schleifenausgang setzen.   */
  1147.             new=mymalloc(ICS); *new=*branch;
  1148.             new->change_cnt=new->use_cnt=0;
  1149.             new->code=LABEL;
  1150.             new->typf=out;
  1151.             insert_IC(branch,new);
  1152.             /*  Test vor die unroll-Variante.   */
  1153.             new=mymalloc(ICS); *new=*branch;
  1154.             new->change_cnt=new->use_cnt=0;
  1155.             if(branch->code==BLT) new->code=BGE;
  1156.             if(branch->code==BLE) new->code=BGT;
  1157.             if(branch->code==BGT) new->code=BLE;
  1158.             if(branch->code==BGE) new->code=BLT;
  1159.             if(branch->code==BEQ) new->code=BNE;
  1160.             if(branch->code==BNE) new->code=BEQ;
  1161.             new->typf=out;
  1162.             insert_IC(head->start,new);
  1163.             new=mymalloc(ICS); *new=*cmp;
  1164.             new->change_cnt=new->use_cnt=0;
  1165.             insert_IC(head->start,new);
  1166.             /*  Einsprungpunkte fuer die Modulos.   */
  1167.             for(i=1;i<unroll;i++){
  1168.                 copy_code(start->start->next,cmp->prev,head->start,1);
  1169.                 new=mymalloc(ICS); *new=*branch;
  1170.                 new->change_cnt=new->use_cnt=0;
  1171.                 new->code=LABEL;
  1172.                 new->typf=label+i+1;
  1173.                 insert_IC(head->start,new);
  1174.             }
  1175.             /*  Testen, welches Modulo. */
  1176.             for(i=unroll-2;i>=0;i--){
  1177.                 new=mymalloc(ICS); *new=*branch;
  1178.                 new->change_cnt=new->use_cnt=0;
  1179.                 new->code=BEQ;
  1180.                 if(i>0) new->typf=label+i+1;
  1181.                    else new->typf=start->start->typf;
  1182.                 insert_IC(head->start,new);
  1183.                 new=mymalloc(ICS); *new=*branch;
  1184.                 new->change_cnt=new->use_cnt=0;
  1185.                 if(SWITCHSUBS) new->q1.val.vlong=l2zl(1L);
  1186.                     else       new->q1.val.vlong=l2zl(i);
  1187.                 eval_const(&new->q1.val,LONG);
  1188.                 new->q1.flags=VAR;
  1189.                 new->q1.v=v;
  1190.                 new->q1.val.vlong=l2zl(0L);
  1191.                 new->typf=t->flags;
  1192.                 if(SWITCHSUBS||i==0){
  1193.                     new->code=TEST;
  1194.                     insert_IC(head->start,new);
  1195.                     if(i>0){
  1196.                         new=mymalloc(ICS);
  1197.                         *new=*head->start->next;
  1198.                         new->change_cnt=new->use_cnt=0;
  1199.                         new->code=SUB;
  1200.                         new->z=new->q1;
  1201.                         new->q2.flags=KONST;
  1202.                         insert_const2(&new->q2.val,new->typf&31);
  1203.                         insert_IC(head->start,new);
  1204.                     }
  1205.                 }else{
  1206.                     new->code=COMPARE;
  1207.                     new->q2.flags=KONST;
  1208.                     insert_const2(&new->q2.val,new->typf&31);
  1209.                     insert_IC(head->start,new);
  1210.                 }
  1211.             }
  1212.             /*  Durchlaeufe modulo unroll berechnen.    */
  1213.             new=mymalloc(ICS); *new=*branch;
  1214.             new->change_cnt=new->use_cnt=0;
  1215.             new->code=AND;
  1216.             new->typf=t->flags;
  1217.             new->q1.flags=VAR;
  1218.             new->q1.v=v;
  1219.             new->q1.val.vlong=l2zl(0L);
  1220.             new->z=new->q1;
  1221.             new->q2.flags=KONST;
  1222.             new->q2.val.vlong=l2zl(unroll-1);
  1223.             eval_const(&new->q2.val,LONG);
  1224.             insert_const2(&new->q2.val,new->typf);
  1225.             new->z=new->q1;
  1226.             insert_IC(head->start,new);
  1227.             new=mymalloc(ICS); *new=*ind;
  1228.             new->change_cnt=new->use_cnt=0;
  1229.             new->code=DIV;
  1230.             new->q1=head->start->next->z;
  1231.             new->z=new->q1;
  1232.             insert_IC(head->start,new);
  1233.             new=mymalloc(ICS); *new=*head->start->next;
  1234.             new->change_cnt=new->use_cnt=0;
  1235.             new->code=SUB;
  1236.             if(!compare_objs(&ind->z,&cmp->q1,new->typf)){
  1237.                 if(ind->code==ADD){new->q1=cmp->q2;new->q2=ind->z;}
  1238.                     else          {new->q2=cmp->q2;new->q2=ind->z;}
  1239.             }else{
  1240.                 if(ind->code==ADD){new->q1=cmp->q1;new->q2=ind->z;}
  1241.                     else          {new->q2=cmp->q1;new->q2=ind->z;}
  1242.             }
  1243.             insert_IC(head->start,new);
  1244.             copy_code(start->start->next,cmp->prev,start->start,unroll-1);
  1245.             label+=unroll;
  1246.             changed|=2;
  1247.         }
  1248.         first_ur=m->next;
  1249.         free(m);
  1250.     }
  1251.     return(changed);
  1252. }
  1253. void unroll(struct flowgraph *start,struct flowgraph *head)
  1254. /*  Versucht loop-unrolling.                                            */
  1255. {
  1256.     struct flowlist *lp;struct flowgraph *end,*g;struct IC *p,*m,*branch,*cmp;
  1257.     struct obj *o,*e,*cc; union atyps init_val,end_val,step_val;
  1258.     unsigned char *tmp;
  1259.     long dist,step,ic_cnt,n;
  1260.     int bflag=0,t=0,i,flags=0; /* 1: sub, 2: init_val gefunden  */
  1261.     end=start->loopend;
  1262.     if(DEBUG&1024) printf("checking for possible unrolling from %d to %d\n",start->index,end->index);
  1263.     for(lp=start->in;lp;lp=lp->next)
  1264.         if(lp->graph->index>start->index&&lp->graph->index<=end->index&&lp->graph!=end) return;
  1265.     if(DEBUG&1024) printf("only one backward-branch\n");
  1266.     e=0; p=end->end;
  1267.     do{
  1268.         if(p->code>=BEQ&&p->code<BRA){ branch=p;bflag=p->code;cc=&p->z; }
  1269.         if(p->code==TEST){
  1270.             if(compare_objs(cc,&p->z,p->typf)) return;
  1271.             o=&p->q1;t=p->typf;cmp=p;
  1272.             end_val.vlong=l2zl(0L); eval_const(&end_val,LONG);
  1273.             insert_const2(&end_val,t);
  1274.             break;
  1275.         }
  1276.         if(p->code==COMPARE){
  1277.             if(compare_objs(cc,&p->z,p->typf)) return;
  1278.             cmp=p;
  1279.             if(p->q1.flags&VAR){
  1280.                 if(ind_vars[p->q1.v->index]){
  1281.                     o=&p->q1;t=p->typf;
  1282.                     e=&p->q2;
  1283.                     break;
  1284.                 }
  1285.             }
  1286.             if(p->q2.flags&VAR){
  1287.                 if(ind_vars[p->q2.v->index]){
  1288.                     o=&p->q2;t=p->typf;
  1289.                     e=&p->q1;
  1290.                     if(bflag==BLT) bflag=BGT;
  1291.                     if(bflag==BLE) bflag=BGE;
  1292.                     if(bflag==BGT) bflag=BLT;
  1293.                     if(bflag==BGE) bflag=BLE;
  1294.                     break;
  1295.                 }
  1296.             }
  1297.             return;
  1298.         }
  1299.         if(p==end->start) return;
  1300.         p=p->prev;
  1301.     }while(p);
  1302.     if(!e||e->flags&KONST){
  1303.         if(e) end_val=e->val;
  1304.         if(DEBUG&1024) printf("end condition is constant\n");
  1305.     }else{
  1306.         if(!(e->flags&VAR)) return;
  1307.         i=e->v->index;
  1308.         if(e->flags&DREFOBJ) i+=vcount-rcount;
  1309.         if(DEBUG&1024) printf("testing end-condition\n");
  1310.         memcpy(rd_defs,end->rd_in,dsize);
  1311.         for(m=end->start;m;m=m->next){
  1312.             if(m==cmp){
  1313.                 if(DEBUG&1024) pric2(stdout,m);
  1314.                 if(!def_invariant(i,-1)) return;
  1315.                 if(DEBUG&1024) printf("end condition loop-invariant\n");
  1316.                 break;
  1317.             }
  1318.             rd_change(m);
  1319.             if(m==end->end) ierror(0);
  1320.         }
  1321.     }
  1322.     p=ind_vars[o->v->index];
  1323.     if(!p) return;
  1324.     if(compare_objs(o,&p->z,t)) return;
  1325.     if(DEBUG&1024) printf("loop condition only dependant on induction var\n");
  1326.     if(!(p->q2.flags&KONST)) return;
  1327.     if(DEBUG&1024) printf("induction is constant\n");
  1328.     for(ic_cnt=0,g=start;g;g=g->normalout){
  1329.         for(m=g->start;m;m=m->next){
  1330.             if(m==p&&!always_reached(start,end,g,p,1)) return;
  1331.             ic_cnt++;
  1332.             if(m==g->end) break;
  1333.         }
  1334.         if(g==end) break;
  1335.     }
  1336.     ic_cnt-=2;  /*  Branch und Test */
  1337.     if(DEBUG&1024) printf("induction always reached\n");
  1338.     if(DEBUG&1024) printf("ICs in loop: %ld\n",ic_cnt);
  1339.     step_val=p->q2.val;
  1340.     if(p->code==SUB) flags|=1;
  1341.     if(e&&!(e->flags&KONST)){
  1342.         /*  Anzahl der Schleifendurchlaeufe kann beim Eintritt in die   */
  1343.         /*  Schleife zur Laufzeit berechnet werden.                     */
  1344. /*        add_ur(UNROLL_INVARIANT,0,4,start,head,cmp,branch,p);*/
  1345.         return;
  1346.     }
  1347.     i=p->z.v->index;
  1348.     if(p->z.flags&DREFOBJ) i+=vcount-rcount;
  1349.     tmp=mymalloc(dsize);
  1350.     memcpy(tmp,head->rd_out,dsize);
  1351.     if(BTST(tmp,i+dcount+1)) return; /*  keine eind. Def.    */
  1352.     bvintersect(tmp,defs[i],dsize);
  1353.     for(i=1;i<=dcount;i++){
  1354.         if(BTST(tmp,i)){
  1355.             if(DEBUG&1024){ printf("possible init:\n");pric2(stdout,dlist[i]);}
  1356.             if((flags&2)||dlist[i]->code!=ASSIGN||!(dlist[i]->q1.flags&KONST)){
  1357.                 free(tmp);return;
  1358.             }
  1359.             init_val=dlist[i]->q1.val;
  1360.             flags|=2;
  1361.         }
  1362.     }
  1363.     free(tmp);
  1364.     if(!(flags&2)) return;
  1365.     if(DEBUG&1024){
  1366.         printf("loop number determinable\n");
  1367.         printf("init_val: ");printval(stdout,&init_val,t,1);
  1368.         printf("\nend_val: ");printval(stdout,&end_val,t,1);
  1369.         printf("\nstep_val: ");printval(stdout,&step_val,t,1);
  1370.         printf("\nflags=%d bflag=%d\n",flags,bflag);
  1371.     }
  1372.     /*  Nur integers als Induktionsvariablen.   */
  1373.     if((t&15)>LONG) return;
  1374.     /*  Distanz und Step werden als long behandelt, deshalb pruefen, ob */
  1375.     /*  alles im Bereich des garantierten Mindestwerte fuer long.       */
  1376.     /*  Wenn man hier die Arithmetik der Zielmaschine benutzen wuerde,  */
  1377.     /*  koennte man theoretisch mehr Faelle erkennen, aber das waere    */
  1378.     /*  recht popelig und man muss sehr aufpassen.                      */
  1379.     if(t&UNSIGNED){
  1380.         eval_const(&end_val,t);
  1381.         if(!zulleq(vulong,l2zl(2147483647))) return;
  1382.         dist=zul2ul(vulong);
  1383.         eval_const(&init_val,t);
  1384.         if(!zulleq(vulong,l2zl(2147483647))) return;
  1385.         dist-=zul2ul(vulong);
  1386.         eval_const(&step_val,t);
  1387.         if(!zulleq(vulong,l2zl(2147483647))) return;
  1388.         step=zul2ul(vulong);
  1389.     }else{
  1390.         eval_const(&end_val,t);
  1391.         if(!zlleq(vlong,l2zl(2147483647/2))) return;
  1392.         if(zlleq(vlong,l2zl(-2147483647/2))) return; /*  eins weniger als moeglich waere */
  1393.         dist=zl2l(vlong);
  1394.         eval_const(&init_val,t);
  1395.         if(!zlleq(vlong,l2zl(2147483647/2))) return;
  1396.         if(zlleq(vlong,l2zl(-2147483647/2))) return; /*  eins weniger als moeglich waere */
  1397.         dist-=zl2l(vlong);
  1398.         eval_const(&step_val,t);
  1399.         if(!zlleq(vlong,l2zl(2147483647))) return;
  1400.         if(zlleq(vlong,l2zl(-2147483647))) return; /*  eins weniger als moeglich waere */
  1401.         step=zl2l(vlong);
  1402.     }
  1403.     if(flags&1) step=-step;
  1404.     if(DEBUG&1024) printf("dist=%ld, step=%ld\n",dist,step);
  1405.     if(step==0) ierror(0);
  1406.     /*  Die Faelle kann man noch genauer untersuchen, ob die Schleife   */
  1407.     /*  evtl. nur einmal durchlaufen wird o.ae.                         */
  1408.     if(step<0&&dist>=0){
  1409.         if(report_suspicious_loops){ error(208);report_suspicious_loops=0;}
  1410.         return;
  1411.     }
  1412.     if(step>0&&dist<=0){
  1413.         if(report_suspicious_loops){ error(208);report_suspicious_loops=0;}
  1414.         return;
  1415.     }
  1416.     if(bflag==BEQ){
  1417.         if(report_suspicious_loops){ error(208);report_suspicious_loops=0;}
  1418.         return;
  1419.     }
  1420.     /*  Aufpassen, ob das Schleifenende bei BNE auch getroffen wird.    */
  1421.     if(bflag==BNE){
  1422.         if(dist%step){
  1423.             if(report_suspicious_loops){ error(208);report_suspicious_loops=0;}
  1424.             return;
  1425.         }
  1426.     }
  1427.     if(bflag==BLT||bflag==BGT||bflag==BNE){
  1428.         if(step>0) dist--; else dist++;
  1429.     }
  1430.     if(dist/step<0) ierror(0);
  1431.     if(DEBUG&1024) printf("loop is executed %ld times\n",dist/step+1);
  1432.     if(start->start->code!=LABEL) ierror(0);
  1433.     if(ic_cnt*(dist/step+1)<=c_flags_val[25].l){
  1434.         /*  Schleife komplett ausrollen.    */
  1435.         add_ur(UNROLL_COMPLETELY,dist/step+1,dist/step+1,start,head,cmp,branch,p);
  1436.     }else{
  1437.         /*  Schleife teilweise ausrollen.   */
  1438.         n=(c_flags_val[25].l-ic_cnt-2)/(2*ic_cnt);
  1439.         add_ur(UNROLL_MODULO,dist/step+1,n,start,head,cmp,branch,p);
  1440.     }
  1441. }
  1442.  
  1443. int loop_optimizations(struct flowgraph *fg)
  1444. /*  steuert Optimierungen in Schleifen  */
  1445. {
  1446.     int changed=0,i;
  1447.     struct flowgraph *g,*last;
  1448.     if(DEBUG&1024) print_flowgraph(fg);
  1449.     if(loops(fg,0)==0) return(0);
  1450.     if(DEBUG&1024) print_flowgraph(fg);
  1451.     first_fg=fg=create_loop_headers(fg,0);
  1452.     if(DEBUG&1024) print_flowgraph(fg);
  1453.     num_defs();
  1454.  
  1455.     bsize=(basic_blocks+CHAR_BIT)/CHAR_BIT;
  1456.     fg_tmp=mymalloc(bsize);
  1457.     ind_vars=mymalloc(vcount*sizeof(*ind_vars));
  1458.     invariant=mymalloc(dsize);
  1459.     inloop=mymalloc(dsize);
  1460.     rd_defs=mymalloc(dsize);
  1461.     rd_tmp=mymalloc(dsize);
  1462.     rd_mode=1;
  1463.     reaching_definitions(fg);
  1464.     if(DEBUG&1024) print_flowgraph(fg);
  1465.     moved=mymalloc(dsize);
  1466.     memset(moved,0,dsize);
  1467.     moved_completely=mymalloc(dsize);
  1468.     memset(moved_completely,0,dsize);
  1469.     not_movable=mymalloc(dsize);
  1470.  
  1471.     first_mov=last_mov=0;
  1472.     first_sr=last_sr=0;
  1473.  
  1474.     for(last=0,g=fg;g;g=g->normalout){
  1475.         if(g->loopend){
  1476.             frequency_reduction(g,g->loopend,last);
  1477.             strength_reduction(g,g->loopend,last);
  1478.             if(c_flags_val[0].l&2048) unroll(g,last);
  1479.         }
  1480.         last=g;
  1481.     }
  1482.  
  1483.     for(i=0;i<vcount;i++) free(defs[i]);
  1484.     free(defs);
  1485.     free(dlist);
  1486.     free(rd_globals);
  1487.     free(rd_statics);
  1488.     free(rd_address);
  1489.     free(rd_drefs);
  1490.     free(rd_parms);
  1491.     free(rd_defs);
  1492.     free(rd_tmp);
  1493.     free(rd_vars);
  1494.  
  1495.     free(invariant);
  1496.     free(inloop);
  1497.  
  1498.     changed|=move_to_head();
  1499.     changed|=do_sr();
  1500.     changed|=do_unroll(changed);
  1501.  
  1502.     free(moved);
  1503.     free(not_movable);
  1504.     free(moved_completely);
  1505.  
  1506.     if(changed&2){
  1507.         if(DEBUG&1024) printf("must repeat num_vars\n");
  1508.         free(vilist);
  1509.         free(av_globals);free(av_statics);
  1510.         free(av_drefs);free(av_address);
  1511.         num_vars();
  1512.     }
  1513.  
  1514.     free(ind_vars);
  1515.     free(fg_tmp);
  1516.  
  1517.     return(changed);
  1518. }
  1519.  
  1520.